# exerc3.py
import jieba
from collections import Counter


def count_novel_words(file_path, top_n=20):
    """统计小说词频"""
    try:
        # 读取文件内容
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # 分词处理
        words = jieba.lcut(content)

        # 过滤单字和停用词
        with open('cn_stopwords.txt', 'r', encoding='utf-8') as f:
            stopwords = set([line.strip() for line in f])

        filtered_words = [
            word for word in words
            if len(word) > 1 and word not in stopwords
        ]

        # 统计词频
        word_counts = Counter(filtered_words)

        # 输出结果
        print(f"出现频率最高的{top_n}个词：")
        for word, count in word_counts.most_common(top_n):
            print(f"{word}: {count}次")

        return word_counts

    except FileNotFoundError:
        print(f"错误：文件{file_path}未找到")
    except Exception as e:
        print(f"发生错误：{str(e)}")


if __name__ == "__main__":
    # 使用示例
    novel_path = '三国演义.txt'  # 确保文件在相同目录
    count_novel_words(novel_path)